# citizen forecasting 2020: a state-by-state experiment
# andreas murr & mike lewis-beck
# prepare AnesState.RData

# clear working memory

rm(list=ls())

# load packages

library(foreign)
library(arm)

# ======
# = ts =
# ======

# load data

Anes = read.dta("anes_cdfdta/anes_cdf.dta", convert.factors=FALSE)

# year

Anes$year = Anes$VCF0004

# state

Anes$state = Anes$VCF0901b

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast = Anes$VCF9028

# save data

Anes2008 = Anes[Anes$year %in% c(1952, seq(1972, 1996, 4), 2004, 2008), c("year", "state", "forecast")]

# ========
# = 2012 =
# ========

# load data

Anes = read.dta("anes2012TS_dta/anes2012TS_dataset.dta", convert.factors=FALSE)

# year

Anes$year = 2012

# state

Anes$state = Anes$sample_state

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast = Anes$preswin_state
Anes$forecast[Anes$preswin_state==5] = 3
Anes$forecast[Anes$preswin_state==-8] = 8
Anes$forecast[Anes$preswin_state==-9] = 9

# save data

Anes2012 = Anes[c("year", "state", "forecast")]

# ========
# = 2016 =
# ========

# load data

Anes = read.dta("anes_timeseries_2016_dta/anes_timeseries_2016_Stata12.dta", convert.factors=FALSE)

# year

Anes$year = 2016

# state

Anes$state = Anes$V163001b

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast = Anes$V161148
Anes$forecast[Anes$V161148==3] = 1
Anes$forecast[Anes$V161148==4] = 2
Anes$forecast[Anes$V161148==5] = 3
Anes$forecast[Anes$V161148==6] = 3
Anes$forecast[Anes$V161148==7] = 8
Anes$forecast[Anes$V161148==8] = 8
Anes$forecast[Anes$V161148==9] = 9
Anes$forecast[Anes$V161148==-8] = 8
Anes$forecast[Anes$V161148==-9] = 9

# save data

Anes2016 = Anes[c("year", "state", "forecast")]

# =====================
# = combine anes data =
# =====================

Anes = rbind(Anes2008, Anes2012, Anes2016)
# Anes$state = as.factor(Anes$state)
Anes$forecast = factor(Anes$forecast, levels = c(1:3, 8:9), labels = c("d", "r", "o", "dk", "NA"))

# =========
# = state =
# =========

# load data

State = read.csv("PresidentialElectionResultByState1948to2016.csv", header=TRUE)

# make winner levels similar to forecast levels  

levels(State$winner) = c(levels(State$winner), "o", "dk", "NA")

# vote shares

State$dem.s = with(State, dem / total)
State$rep.s = with(State, rep / total)
State$ind = with(State, ifelse(is.na(ind)==TRUE, 0, ind))
State$ind.s = with(State, ind / total)
State$oth.s = with(State, oth / total)
State$dem.two.s = with(State, dem.s / (dem.s+rep.s))

# select variables

State = State[c("year", "state", "electoralv", "winner", "dem.two.s")]

# ===================================================
# = aggregate anes data and combine with state data =
# ===================================================

State$p.dem = NA
State$n = NA
State$forecast = NA

for (i in 1:nrow(State)){
	if (State$year[i]%in%Anes$year & State$state[i]%in%Anes$state){
		data = Anes[Anes$year==State$year[i] & Anes$state==State$state[i],]
		n.dem = sum(data$forecast=="d", na.rm=TRUE)
		n.rep = sum(data$forecast=="r", na.rm=TRUE)
		State$n[i] = n.dem + n.rep
		State$p.dem[i] = n.dem / State$n[i]
		State$forecast[i] = ifelse(State$p.dem[i]>.5, "d", ifelse(State$p.dem[i]==.5, "t", "r"))		
	}
	else {
		State$n[i] = NA
		State$p.dem[i] = NA
		State$forecast[i] = NA
	}
}

# =============
# = save data =
# =============

AnesState = State
save(AnesState, file="AnesState.RData")

# ===================
# = end source code =
# ===================